********************************************************************************
** 	TITLE:		us1984-2016_polls		                                      ** 	
**	AUTHOR:	    Philippe Mongrain                                             **
**  DATE:	    October 2022 						                          **	
**  VERSION:	Stata 16					                                  **	
********************************************************************************

* Version control

version 16.0

* Import data

use "LONG_MI_NATURE_20180111.dta", clear

keep if country == "United States" & electionyr >= 1984 & election == "Presidential"

* Generate survey date

format %tdMon_DD,_CCYY polldate

* Generate election date

format %tdMon_DD,_CCYY elecdate

rename elecdate electiondate

* Time of survey

gen time = (electiondate - polldate)

drop if time == 0

* Keep and order needed variables

keep polldate electiondate electionyr party poll_ time

rename poll_ vote

order polldate electiondate electionyr time party vote

* Generate rank of parties

drop if vote == .

gsort polldate -vote

bysort polldate : gen rank = _n

gen first = party if rank == 1
gen second = party if rank == 2

bysort polldate : gen winner = first[1]
bysort polldate : gen runnerup = second[2]

* Generate poll margin

bysort polldate : gen pollmar = vote[1] - vote[2]

* Drop duplicates

duplicates tag polldate, gen(dup)
duplicates drop polldate, force
drop dup

* Misleading poll

gen misleading = 0 if winner == 2 & runnerup == 1 & electionyr == 1984
replace misleading = 1 if misleading!=0 & electionyr == 1984

replace misleading = 0 if winner == 2 & runnerup == 1 & electionyr == 1988
replace misleading = 1 if misleading!=0 & electionyr == 1988

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 1992
replace misleading = 1 if misleading!=0 & electionyr == 1992

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 1996
replace misleading = 1 if misleading!=0 & electionyr == 1996

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 2000
replace misleading = 1 if misleading!=0 & electionyr == 2000

replace misleading = 0 if winner == 2 & runnerup == 1 & electionyr == 2004
replace misleading = 1 if misleading!=0 & electionyr == 2004

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 2008
replace misleading = 1 if misleading!=0 & electionyr == 2008

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 2012
replace misleading = 1 if misleading!=0 & electionyr == 2012

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 2016
replace misleading = 1 if misleading!=0 & electionyr == 2016

replace misleading = 0 if winner == 1 & runnerup == 2 & electionyr == 2020
replace misleading = 1 if misleading!=0 & electionyr == 2020

* Save

drop if time == 0 | time == .

keep polldate pollmar misleading time

save "us1984-2016_polls.dta", replace